# Packages
# ==============================================================================
library(tidyr)
library(ggplot2)
library(knitr)
library(DT)
library(stringr)
library(xtable)
library(hrbrthemes)
library(dplyr)
library(forcats)
library(finalfit)
library(dplyr)
library(stringr)
library(janitor)
library(lme4)
library(extrafont)
library(cowplot)
library(knitr)
library(parameters)
library(here)
# Load fonts
extrafont::loadfonts(quiet = TRUE)
# Load data
# ==============================================================================
bats_res = readRDS("data/Ethiopia_bat-info_test-summary.RDS")
bats_res$SiteName = trimws(gsub("-\\<Bat\\>|\\<Bat\\>", "", bats_res$SiteName))
bats_res$ConcurrentSamplingSite = bats_res$SiteName
tests = readRDS("data/Ethiopia_bat-test-info.RDS")
tests$SiteName = trimws(gsub("-\\<Bat\\>|\\<Bat\\>", "", tests$SiteName))
tests$Interface = ifelse(tests$ScientificName %in% "Rhinopoma hardwickii",
"Cave", "Building")
events = readRDS("data/Ethiopia_bat-sampling-events.RDS")
df = bats_res
df$Family = str_to_title(df$Family)
df$AgeClass = word(df$AgeClass, 1)
df$Interface = ifelse(df$ScientificName %in% "Rhinopoma hardwickii",
"Cave", "Building")
date_order = unique(format(as.Date(sort(df$EventDate), format = "%Y-%m-%d"),
"%Y-%B"))
df$EventMY = format(as.Date(df$EventDate, format = "%Y-%m-%d"), "%Y-%B")
df$EventMY = factor(df$EventMY, levels = date_order, ordered = TRUE)
tests = left_join(tests, df[ , c("AnimalID", "Family")], by = "AnimalID")df %>%
group_by(SiteName, EventDate, SeasonModelled) %>%
tally(name = "No. of bats sampled") %>%
group_by(EventDate) %>%
arrange(SiteName, EventDate) %>%
adorn_totals("row") %>%
datatable(options = list(dom = 't'), rownames = FALSE)df %>%
select(Family, ScientificName) %>%
group_by(Family, ScientificName) %>%
tally() %>%
adorn_totals("row") %>%
datatable(options = list(dom = 't'), rownames = FALSE)Number of bats tested = 589
Number of bats testing positive = 99
Percent bats testing positive = 16.8081494
Made with ArcGIS 10.1
out_file = file.path(here(), "manuscript_figures")
myimages = list.files(out_file, pattern = glob2rx("Figure-1*png"),
full.names = TRUE)
include_graphics(myimages)p_df = df %>%
group_by(EventMY, is_positive) %>%
arrange(EventMY) %>%
summarize(n = n()) %>%
mutate(is_positive = ifelse(is_positive, "Virus Positive", "Virus Negative")) %>%
mutate(is_positive = factor(is_positive, levels = c("Virus Positive", "Virus Negative")))
levels(p_df$EventMY) = gsub("-", "\n", levels(p_df$EventMY))
event_plot = ggplot(p_df, aes(x = EventMY, y = n, fill = is_positive, label = n)) +
geom_bar(stat="identity", width = 0.5) +
scale_fill_manual(values = c("#BD4F2C", "#2C42BD"), name = "Test result") +
theme_ipsum(base_size = 12) +
ylab("Number of bats") +
xlab("") +
theme(
panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid = element_blank(),
panel.border = element_blank(),
legend.position = "top") +
scale_y_continuous(expand = c(0,1))
p_season_df = df %>%
group_by(EventMY, SeasonModelled) %>%
arrange(EventMY) %>%
summarize(n = n())
levels(p_season_df$EventMY) = gsub("-", "\n", levels(p_season_df$EventMY))
event_plot = event_plot + geom_text(data = p_season_df,
aes(label = SeasonModelled,
x = EventMY, y = n,
fill = NULL),
nudge_y = 2,
size = 3)
cowplot::save_plot("results/figs/event-plot.pdf", event_plot, base_height = 6)
cowplot::save_plot("results/figs/event-plot.png", event_plot, base_height = 6)
event_plotbat_plot = df %>%
select(ScientificName, is_positive) %>%
group_by(ScientificName, is_positive) %>%
summarize(n = n()) %>%
arrange(desc(n)) %>%
as.data.frame %>%
mutate(ScientificName = factor(ScientificName,
levels = c(
"Neoromicia cf. somalica",
"Mops midas",
"Chaerephon pumilus",
"Rhinopoma hardwickii"
))) %>%
ggplot( aes(x = ScientificName, y= -n,
fill = as.factor(-is_positive), width = 0.75)) +
geom_bar(stat="identity") +
scale_fill_manual(values = c("#BD4F2C", "#2C42BD"), name = "") +
scale_x_discrete(name = "", position = "top") +
scale_y_continuous(name = "No. of bats",
breaks = seq(0, -300, by = -50), # y axis values (before coord_flip)
labels = seq(0, 300, by = 50)) +
coord_flip() +
theme_ipsum(base_size = 12, plot_title_size = 12) +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position="none") +
xlab("") +
ylab("Number of bats") +
labs(title = "Species tested")
cowplot::save_plot("results/figs/bat-plot.pdf", bat_plot, base_height = 3)
bat_plotsp_df = df %>%
select(ScientificName, is_positive, EventDate) %>%
group_by(ScientificName, is_positive, EventDate) %>%
summarize(n = n()) %>%
arrange(desc(n)) %>%
mutate(sp_e = paste0(EventDate, "_", ScientificName)) %>%
as.data.frame
sp_l = split(sp_df, sp_df$sp_e)
# ref:
# http://www.sthda.com/english/wiki/ggplot2-pie-chart-quick-start-guide-r-software-and-data-visualization
blank_theme = theme_minimal()+
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
plot.title=element_text(size=14, face="bold"),
legend.position = "none"
)
out_dir = "results/figs/event_pie"
if( !dir.exists(out_dir)) dir.create(out_dir)
invisible(lapply(sp_l, function(sp, out_dir) {
if(nrow(sp) > 1) {
sp_p = sp %>%
ggplot(aes(x = "", y = n, fill = as.factor(-is_positive))) +
geom_bar(width = 1, stat="identity") +
scale_fill_manual(values = c("#BD4F2C", "#2C42BD"), name = "") +
coord_polar("y", start=0) +
theme_minimal() +
blank_theme
} else {
sp_p = sp %>%
ggplot(aes(x = "", y = n, fill = "#B9B7B6")) +
geom_bar(width = 1, stat="identity") +
scale_fill_manual(values = "#2C42BD", name = "") +
coord_polar("y", start=0) +
theme_minimal() +
blank_theme
}
out_name_pdf = file.path(out_dir, paste0(gsub(" ", "_", unique(sp$sp_e)), ".pdf"))
out_name_png = file.path(out_dir, paste0(gsub(" ", "_", unique(sp$sp_e)), ".png"))
cowplot::save_plot(out_name_pdf, sp_p, base_height = .5)
cowplot::save_plot(out_name_png, sp_p, base_height = .5)
}, out_dir))out_dir = file.path(here(), "results/figs/event_pie")
myimages = list.files(out_dir, pattern = ".png", full.names = TRUE)
include_graphics(myimages)out_file = file.path(here(), "manuscript_figures")
myimages = list.files(out_file, pattern = glob2rx("Figure-2*png"),
full.names = TRUE)
include_graphics(myimages)tbl = xtable(table(unlist(strsplit(df$pos_virus[ df$is_positive ], "; "))))
names(tbl) = "No. of bats testing positive for virus"
datatable(tbl)df %>%
filter( is_positive & pos_virus_n > 1 ) %>%
select(SiteName, ScientificName, pos_virus) %>%
group_by(SiteName, ScientificName, pos_virus) %>%
summarize(n_bats = n()) %>%
datatable(colnames = c("SiteName", "ScientificName", "Virus coinfection",
"No. of bats"))Rectal swabs were the most common specimen type to yield a positive virus sequence (92/99 bats), including 27 bats from whom sequences were confirmed in both rectal and oral swabs. In addition, seven bats had viruses detected only via their oral swabs.
df %>%
filter(is_positive) %>%
group_by(pos_specimen_type) %>%
tally() %>%
adorn_totals("row") %>%
datatable(options = list(dom = 't'))ids = df$AnimalID[ which(df$pos_virus_n > 1) ]
multi_bats = unique(
tests[ tests$AnimalID %in% ids & !is.na(tests$VirusGroup),
c("AnimalID", "SpecimenType", "VirusGroup")]
)
tbls = vector("list", 3)
for(i in seq_along(ids)) {
tbls[[ i ]] =
multi_bats %>%
filter(AnimalID %in% ids[ i ]) %>%
datatable(options = list(dom = 't'))
}
tbls[[1]]tbls[[2]]tbls[[3]]df %>%
filter(is_positive) %>%
select(pos_virus, pos_specimen_type) %>%
group_by(pos_virus, pos_specimen_type) %>%
tally() %>%
as_data_frame() %>%
spread(pos_specimen_type, n) %>%
adorn_totals(c("row", "col")) %>%
datatable(options = list(dom = 't'))# Crosstable
dependent = "is_positive"
explanatory = c("Sex",
"AgeClass",
"SeasonModelled")
df %>%
filter(ScientificName %in% "Rhinopoma hardwickii") %>%
summary_factorlist(dependent, explanatory,
p=TRUE,
add_dependent_label=TRUE,
p_cat = "fisher" # Fisher's Exact test
) %>%
knitr::kable(align=c("l", "l", "r", "r", "r"))| Dependent: is_positive | FALSE | TRUE | p | |
|---|---|---|---|---|
| Sex | female | 77 (75.5) | 49 (68.1) | 0.305 |
| male | 25 (24.5) | 23 (31.9) | ||
| AgeClass | adult | 83 (81.4) | 59 (81.9) | 1.000 |
| subadult | 19 (18.6) | 13 (18.1) | ||
| SeasonModelled | Dry | 10 (9.8) | 0.006 | |
| Wet | 92 (90.2) | 72 (100.0) |
# Crosstable
dependent = "is_positive"
explanatory = c(
"Sex",
"AgeClass",
"SeasonModelled")
df %>%
filter(ScientificName %in% "Chaerephon pumilus") %>%
summary_factorlist(dependent, explanatory,
p=TRUE,
add_dependent_label=TRUE,
p_cat = "fisher" # Fisher's Exact test
) %>%
knitr::kable(align=c("l", "l", "r", "r", "r"))| Dependent: is_positive | FALSE | TRUE | p | |
|---|---|---|---|---|
| Sex | female | 133 (62.1) | 8 (57.1) | 0.779 |
| male | 81 (37.9) | 6 (42.9) | ||
| AgeClass | adult | 194 (90.7) | 10 (71.4) | 0.046 |
| subadult | 20 (9.3) | 4 (28.6) | ||
| SeasonModelled | Dry | 36 (16.8) | 2 (14.3) | 1.000 |
| Wet | 178 (83.2) | 12 (85.7) |
# Crosstable
dependent = "is_positive"
explanatory = c(
"Sex",
# "AgeClass", # there are no subadult for Mops
"SeasonModelled")
df %>%
filter(ScientificName %in% "Mops midas") %>%
summary_factorlist(dependent, explanatory,
p=TRUE,
add_dependent_label=TRUE,
p_cat = "fisher" # Fisher's Exact test
) %>%
knitr::kable(align=c("l", "l", "r", "r", "r"))| Dependent: is_positive | FALSE | TRUE | p | |
|---|---|---|---|---|
| Sex | female | 128 (76.2) | 8 (66.7) | 0.491 |
| male | 40 (23.8) | 4 (33.3) | ||
| SeasonModelled | Dry | 59 (35.1) | 3 (25.0) | 0.549 |
| Wet | 109 (64.9) | 9 (75.0) |
# Crosstable
dependent = "is_positive"
explanatory = c(
"Sex"
# "AgeClass"
# "SeasonModelled"
)
df %>%
filter(ScientificName %in% "Neoromicia cf. somalica") %>%
summary_factorlist(dependent, explanatory,
p=TRUE,
add_dependent_label=TRUE,
p_cat = "fisher" # Fisher's Exact test
) %>%
knitr::kable(align=c("l", "l", "r", "r", "r"))| Dependent: is_positive | FALSE | TRUE | p | |
|---|---|---|---|---|
| Sex | female | 5 (83.3) | 1 (100.0) | 1.000 |
| male | 1 (16.7) |
df_filter = df %>%
filter(! ScientificName %in% c("Neoromicia cf. somalica", "Mops midas"))
m1 = glm(is_positive ~ Sex + AgeClass + SeasonModelled + ScientificName,
data = df_filter, family = binomial)
# summary(m1)
print_md(model_parameters(m1, exponentiate = TRUE))| Parameter | Odds Ratio | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | 0.02 | 0.01 | (2.30e-03, 0.06) | -5.37 | < .001 |
| Sex (male) | 1.31 | 0.38 | (0.73, 2.33) | 0.92 | 0.359 |
| AgeClass (subadult) | 1.23 | 0.44 | (0.60, 2.45) | 0.58 | 0.562 |
| SeasonModelled (Wet) | 4.33 | 3.30 | (1.20, 27.83) | 1.92 | 0.055 |
| ScientificName (Rhinopoma hardwickii) | 10.22 | 3.30 | (5.59, 19.93) | 7.21 | < .001 |
df_filter = df %>%
filter(! ScientificName %in% c("Neoromicia cf. somalica"))
m3 = glm(is_positive ~ Sex + SeasonModelled + ScientificName,
data = df_filter, family = binomial)
# summary(m3)
print_md(model_parameters(m3, exponentiate = TRUE))| Parameter | Odds Ratio | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | 0.02 | 0.01 | (7.21e-03, 0.06) | -6.85 | < .001 |
| Sex (male) | 1.35 | 0.36 | (0.80, 2.27) | 1.13 | 0.260 |
| SeasonModelled (Wet) | 2.73 | 1.37 | (1.11, 8.23) | 2.01 | 0.044 |
| ScientificName (Mops midas) | 1.31 | 0.54 | (0.57, 2.95) | 0.66 | 0.512 |
| ScientificName (Rhinopoma hardwickii) | 10.53 | 3.38 | (5.78, 20.49) | 7.34 | < .001 |
glm(is_positive ~ Sex + AgeClass + SeasonModelled + ScientificName,
data = df_filter, family = binomial) %>%
model_parameters(exponentiate = TRUE) %>%
print_md()| Parameter | Odds Ratio | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | 0.02 | 0.01 | (7.15e-03, 0.06) | -6.86 | < .001 |
| Sex (male) | 1.35 | 0.36 | (0.80, 2.28) | 1.13 | 0.259 |
| AgeClass (subadult) | 1.26 | 0.45 | (0.62, 2.50) | 0.64 | 0.522 |
| SeasonModelled (Wet) | 2.67 | 1.34 | (1.09, 8.06) | 1.96 | 0.050 |
| ScientificName (Mops midas) | 1.35 | 0.56 | (0.59, 3.05) | 0.71 | 0.475 |
| ScientificName (Rhinopoma hardwickii) | 10.39 | 3.34 | (5.69, 20.23) | 7.28 | < .001 |
dd =
df %>%
group_by(EventMY, EventDate, SeasonModelled, ConcurrentSamplingSite) %>%
tally() %>%
group_by(EventMY) %>%
mutate(id = 1:n()) %>%
arrange(EventDate)
df_filter = left_join(df_filter, dd, by = c("EventDate", "EventMY"))